
/*================================================================================*/
/* FTSE GREEN REVENUE SAMPLES */
/* Date: 12 June 2023 */
/* Data needed: FTSE Russel Green Revenue data 
				(a) cross-sectional data: "Bulk4July2018.xls", 
				(b) panel data with sub-segment information: "File_A.xlsx", "File_B.xlsx", "File_C.xlsx", 
				(c) SIC industry codes: "ControlVars_Orbis_WS_MASTER.dta" */
/*================================================================================*/

install ssc mipolate 

global raw "Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-21-2022\Replication_files"

cd "Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-21-2022\Replication_files"


log using "Z:\CLEAN_INNOVATION\archives\Kruse_T\Other\Construct_Master_dataset\OneDrive_1_10-21-2022\Replication_files\Log_file\1_ftsedata_creation.log", replace
/*=======================================*/
/* FTSE Russel Green Revenue data */
/*=======================================*/
// Prepare Green Revenue variables


// 1) import FTSE bulk data (cross-sectional)
clear all 
import excel "Bulk4July2018.xls", sheet("Sheet") cellrange(A2:P16269) firstrow

// Generate Dummy-variables for type of GR-estimate
gen SpecEstim = 0 
replace SpecEstim = 1 if EstimateType=="Specific Estimate"
 
gen QuantEstim = 0
replace QuantEstim = 1 if EstimateType=="Quantitative Estimate"
 
gen DisclEstim = 0
replace DisclEstim = 1 if EstimateType=="Disclosed"
 
gen EstimInfoNA = 0 
replace EstimInfoNA = 1 if EstimateType==""

//expand the FTSE-bulk dataset so that have 9 observations per firm, to prepare data for the panel structure of the Subsegment data.
expand 9
sort SEDOL
 
//Panel variable: SEDOL_num (strongly balanced)
//Time variable: Year, 2009 to 2017
bysort SEDOL: gen Year=2008 + _n
encode SEDOL, gen(SEDOL_num)
xtset SEDOL_num Year

save "${raw}Replication_BulkFTSE.dta", replace

// 2) Prepare sub-segment data which contains the detailed sub-segment Green Revenue data // 
// the sub-segment green revenue data is divided into three files (File_A, File_B, File_C)
clear all
import excel "File_A.xlsx", sheet("Blatt1") firstrow clear
save "${raw}File_A.dta", replace

clear all
import excel "File_B.xlsx", sheet("Blatt1") firstrow
save "${raw}File_B.dta", replace

clear all
import excel "File_C.xlsx", sheet("Blatt1") firstrow clear
save "${raw}File_C.dta", replace

* append all datasets
clear all
use "${raw}File_A.dta"
append using "${raw}File_B"
append using "${raw}File_C"
 
generate Year = substr(Title, 3, .) 
order Year, after(Title)
 
destring Year, generate(YearNum) force 
drop Year
rename YearNum Year
drop if missing(Year) 

sort SEDOL Year

save "${raw}Replication_New_FTSE_Data_appended.dta", replace

// 3) Merge FTSE bulk to FTSE subsegment data 
* In the detailed subsegment data we have multiple observations per company, 
* both due to multiple subsegments as well as due to multiple years. 

use "${raw}Replication_BulkFTSE.dta", clear
mmerge SEDOL Year using "${raw}Replication_New_FTSE_Data_appended", type(1:n) unmatched(both) missing(nomatch) 

// rename variables // 
rename R SubSegRevShare
rename Revenue SubSegRevLCU
rename Segment_Revenue Segment_RevenueLCU_unscaled

* Manually replace  subsegment revenue shares for observations with corrected values on the online FTSE platform*
replace SubSegRevShare=0.6705 if (Company=="Cia Paranaense de Energia Copel Pref B" & Year==2010 & SubSegment=="GeT_Hydro")
replace SubSegRevShare=0.9820 if (Company=="Menicon" & Year==2016 & Segment=="Contact Lens" & SubSegment=="Contact Lens")

* compute firm-level revenue 
egen Tag_SEDOL_Segment_Year = tag(SEDOL Segment Year)
bysort SEDOL Year: egen Company_Rev_LCU_unscaled = total(Segment_RevenueLCU_unscaled) if Tag_SEDOL_Segment_Year==1
egen long SEDOLNum = group(SEDOL)
bysort SEDOLNum Year: mipolate Company_Rev_LCU_unscaled SEDOLNum, generate(Company_Rev_LCU_unscaledMipl) groupwise

* Create Segment_Revenue_Share
gen Segment_Revenue_Share = Segment_RevenueLCU_unscaled/Company_Rev_LCU_unscaledMipl

* Generate dummy variable to indicate if an observation is green or non-green.
* If the GR_SubSector is empty-string (""), it is a non-green subsegment
generate GreenSubSectDum =0
replace GreenSubSectDum = 1 if GR_SubSector !="" 

* Generate dummy indicating if all subsegments within a particular segment are green, i.e. there is no subsegment non-green, the min of GreenSubSectDum==1.
egen AllSegGreenDum = min(GreenSubSectDum==1), by(SEDOL Year Segment)

* Generate dummy indicating if at least one Subsegment within a segment is green, meaning that not all subsegments are allowed to be non-green. 
egen Atleast1GreenDum = max(GreenSubSectDum==1), by(SEDOL Segment Year)

// 5) Constructe MinGR variable (3 possible cases)

* Case 1: All subsegments in a segment are green.
bysort SEDOL Segment Year: gen MinGreenShare1 = Segment_Revenue_Share if AllSegGreenDum==1
replace MinGreenShare1 = 0 if MinGreenShare1==.

** Green Revenue Share is coded as 0 non-green revenue.
gen SubSegNonGRShare = SubSegRevShare if GreenSubSectDum==0
egen NonGRShareSegment = total(SubSegNonGRShare), by(SEDOL Segment Year) 
replace NonGRShareSegment = 0 if NonGRShareSegment==.

* Case 2: Not all subsegments are green. 
egen MinNonGRSharewithinSegment = min(SubSegNonGRShare), by(SEDOL Segment Year)
bysort SEDOL Segment Year: gen MinGreenShare2 = (Segment_Revenue_Share - NonGRShareSegment) if (AllSegGreenDum==0 & MinNonGRSharewithinSegment!=0 & Atleast1GreenDum==1)
replace MinGreenShare2=0 if MinGreenShare2==.

* Case 3: At least one green subsegment exists
egen MinSegmentRevenueShare = min(SubSegRevShare), by(SEDOL Segment Year)
replace MinSegmentRevenueShare = 0 if MinSegmentRevenueShare==.
bysort SEDOL Segment Year: gen MinGreenShare3 = SubSegRevShare if GreenSubSect==1 & AllSegGreen==0 & MinSegmentRevenueShare==0 & Atleast1Green==1 & MinGreenShare2==0
replace MinGreenShare3=0 if MinGreenShare3==.
egen MinGreenShare3Segment = total(MinGreenShare3), by(SEDOL Segment Year)

* Sum GRShare1 & GRShare2 & GRShare3Segment for each observation:
gen MinGreenShareSum = MinGreenShare1 + MinGreenShare2 + MinGreenShare3Segment 

* Tag MinGreenShareSum by SEDOL Segment Year
egen MinGRShareTotal_tag = total(MinGreenShareSum) if Tag_SEDOL_Segment_Year==1, by(SEDOL Year)
bysort SEDOL Segment Year: mipolate MinGRShareTotal_tag SEDOLNum, gen(MinGRShareTotal) groupwise
replace MinGRShareTotal = 0 if MinGRShareTotal==.

* Correct for rounding errors in the raw data where the sum of the green revenue share is above 1, and for green revenue shares less than zero due to rounding errors in the raw data 
gen MinGRShareTotalNonNeg = MinGRShareTotal 
replace MinGRShareTotalNonNeg=1 if MinGRShareTotalNonNeg>1 & MinGRShareTotalNonNeg!=.  
replace MinGRShareTotalNonNeg=0 if MinGRShareTotalNonNeg<0 
gen MinNonGRShareTotal = 1- MinGRShareTotalNonNeg

* Generate dummy if a firm-year observation has a strictly positive green revenue 
gen MinGR_yes = 0
replace MinGR_yes = 1 if (MinGRShareTotalNonNeg>0 & MinGRShareTotalNonNeg<=1)

egen long isincode = group(ISIN)

// Merge SIC (2- and 3-digit) industry codes downloaded from Worldscope
mmerge ISIN Year using "ControlVars_Orbis_WS_MASTER", type(n:1)  missing(nomatch) // Merge SIC (2- and 3-digit) industry codes downloaded from Worldscope

gen SIC_2digit = substr(USSICprimarycodes, 1,2)
gen SIC_3digit = substr(USSICprimarycodes, 1,3)

destring SIC_2digit, gen(SIC_2digitNum)
destring SIC_3digit, gen(SIC_3digitNum)

// generate 2-digit SIC1 // 
tostring SIC1, generate(SIC1_str)
gen SIC1_2dgts = substr(SIC1_str,1,2)
destring SIC1_2dgts, gen(SIC1_2dgtsNum)

// generate 3-digit SIC1 //
gen SIC1_3dgts = substr(SIC1_str,1,3)
destring SIC1_3dgts, gen(SIC1_3dgtsNum)
bysort ISIN Year: gen firm_n= _n


/*========================================*/
/* Create samples of top green firms */
/*========================================*/

// Portfolios of firms across green revenue percentiles
xtile XT10_US_MinGRShareTotalNonNeg = MinGRShareTotalNonNeg if (Year==2013 & MinGRShareTotalNonNeg>0 & MinGRShareTotalNonNeg!=. & CountryName=="United States" & firm_n==1), nquantiles(10)

// MAIN SAMPLE
export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw}US_MinGR_top8Xtile.xlsx" if (XT10_US_MinGRShareTotalNonNeg==8 | XT10_US_MinGRShareTotalNonNeg==9 | XT10_US_MinGRShareTotalNonNeg==10) & Year==2013 & CountryName=="United States" & firm_n==1, firstrow(variables) replace

// ONLY GREEN REVENUE
export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw}US_MinGR_eq1.xlsx" if MinGRShareTotalNonNeg==1 & Year==2013 & CountryName=="United States" & firm_n==1, firstrow(variables) replace

// TOP 40% GREEN REVENUE
export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw}US_MinGR_top40pc.xlsx" if (XT10_US_MinGRShareTotalNonNeg==7 | XT10_US_MinGRShareTotalNonNeg==8 | XT10_US_MinGRShareTotalNonNeg==9 | XT10_US_MinGRShareTotalNonNeg==10) & Year==2013 & CountryName=="United States" & firm_n==1, firstrow(variables) replace

// MEDIAN GREN FIRMS			   
export excel isincode MinGRShareTotalNonNeg XT10_US_MinGRShareTotalNonNeg using "${raw}US_MinGR_top5Xtile.xlsx" if (XT10_US_MinGRShareTotalNonNeg==5) & Year==2013 & CountryName=="United States" & firm_n==1, firstrow(variables) replace

// FIRMS SOME GREEN REVENUE BETWEEN 2009 AND 2013
* Generate dummy indicator, taking value of 1 if between 2009-13, the MinGR is at least once strictly positive
bysort isincode: gen MinGR_yes0913 = MinGR_yes if Year>=2009 & Year<=2013 & firm_n==1
bysort isincode: egen MinGR_yes0913full = max(MinGR_yes0913)
export excel isincode MinGR_yes0913full MinGRShareTotalNonNeg CountryName using "${raw}US_MinGR_yes0913.xlsx" if CountryName=="United States" & MinGR_yes0913full==1 & Year==2009 & firm_n==1, firstrow(variables) replace

log close
